Aminet 25

home *** CD-ROM | disk | FTP | other *** search

/ Aminet 25 / Aminet 25 (1998)(GTI - Schatztruhe)[!][Jun 1998].iso / Aminet / game / shoot / ADoom_src_1_2.lha / ADoom_src / amiga_draw.s < prev next >

Wrap

Text File | 1998-03-01 | 39KB | 1,782 lines

* * amiga_draw.s - optimized rendering * by Aki Laukkanen <amlaukka@cc.helsinki.fi> * * This file is public domain. * mc68020 include "exec/types.i" include "exec/funcdef.i" include "exec/exec_lib.i" ;----------------------------------------------------------------------- SCREENWIDTH equ 320 FRACBITS equ 16 FRACUNIT equ (1<<FRACBITS) * * global functions * xdef _init_r_draw xdef @init_r_draw ;; xdef _R_DrawColumn_030 ; high detail ;; xdef @R_DrawColumn_030 xdef _R_DrawColumn_040 ; high detail xdef @R_DrawColumn_040 xdef _R_DrawSpan_040 xdef @R_DrawSpan_040 xdef _R_DrawColumn_060 xdef @R_DrawColumn_060 xdef _R_DrawSpan_060 xdef @R_DrawSpan_060 xdef _R_DrawFuzzColumn xdef @R_DrawFuzzColumn ;; xdef _R_DrawTranslatedColumn ;; xdef @R_DrawTranslatedColumn xdef _R_DrawSpanLow ; low detail xdef @R_DrawSpanLow xdef _R_DrawColumnLow xdef @R_DrawColumnLow xdef _R_DrawFuzzColumnLow xdef @R_DrawFuzzColumnLow ;; xdef _R_DrawTranslatedColumnLow ;; xdef @R_DrawTranslatedColumnLow xdef _R_DrawMaskedColumn xdef @R_DrawMaskedColumn xdef _V_DrawPatch xdef @V_DrawPatch xdef _V_DrawPatchDirect xdef @V_DrawPatchDirect * * needed symbols/labels * xref _SysBase xref _SCREENWIDTH xref _SCREENHEIGHT xref _dc_yl xref _dc_yh xref _dc_x xref _columnofs ;; xref _ylookup xref _ylookup2 xref _dc_iscale xref _centery xref _dc_texturemid xref _dc_source xref _dc_colormap xref _ds_xfrac xref _ds_yfrac xref _ds_x1 xref _ds_y xref _ds_x2 xref _ds_xstep xref _ds_ystep xref _ds_source xref _ds_colormap xref _fuzzoffset xref _fuzzpos xref _viewheight xref _dc_translation xref _colormaps ;----------------------------------------------------------------------- section text,code near a4,-2 ; low detail drawing functions ;----------------------------------------------------------------------- ; patch _SCREENWIDTH into draw routines (self-modifying code) and flush caches _init_r_draw @init_r_draw movem.l a6,-(sp) lea @init_r_draw(pc),a0 move.l _SCREENWIDTH(a4),d0 ; d0 = _SCREENWIDTH move.w d0,sw12_1-@init_r_draw+2(a0) move.w d0,sw12_3-@init_r_draw+2(a0) move.w d0,sw12_4-@init_r_draw+2(a0) move.w d0,sw12_4a-@init_r_draw+2(a0) move.w d0,sw12_5-@init_r_draw+2(a0) move.w d0,sw12_5a-@init_r_draw+2(a0) move.w d0,sw12_6-@init_r_draw+2(a0) move.w d0,sw12_7-@init_r_draw+2(a0) move.w d0,sw12_8-@init_r_draw+2(a0) add.l d0,d0 ; d0 = 2 * _SCREENWIDTH move.w d0,sw22_1-@init_r_draw+2(a0) move.w d0,sw22_3-@init_r_draw+2(a0) move.w d0,sw22_4-@init_r_draw+2(a0) move.w d0,sw22_4a-@init_r_draw+2(a0) move.w d0,sw22_5-@init_r_draw+2(a0) move.w d0,sw22_5a-@init_r_draw+2(a0) move.w d0,sw22_6-@init_r_draw+2(a0) move.w d0,sw22_7-@init_r_draw+2(a0) move.w d0,sw22_8-@init_r_draw+2(a0) add.l _SCREENWIDTH(a4),d0 ; d0 = 3 * _SCREENWIDTH move.w d0,sw32_1-@init_r_draw+2(a0) move.w d0,sw32_3-@init_r_draw+2(a0) move.w d0,sw32_4-@init_r_draw+2(a0) move.w d0,sw32_4a-@init_r_draw+2(a0) move.w d0,sw32_5-@init_r_draw+2(a0) move.w d0,sw32_5a-@init_r_draw+2(a0) move.w d0,sw32_6-@init_r_draw+2(a0) move.w d0,sw32_7-@init_r_draw+2(a0) move.w d0,sw32_8-@init_r_draw+2(a0) add.l _SCREENWIDTH(a4),d0 ; d0 = 4 * _SCREENWIDTH move.w d0,sw42_1-@init_r_draw+2(a0) move.w d0,sw42_3-@init_r_draw+2(a0) move.w d0,sw42_4-@init_r_draw+2(a0) move.w d0,sw42_5-@init_r_draw+2(a0) move.w d0,sw42_6-@init_r_draw+2(a0) move.w d0,sw42_8-@init_r_draw+2(a0) move.l _SCREENWIDTH(a4),d0 neg.l d0 ; d0 = -_SCREENWIDTH move.w d0,swm10_1-@init_r_draw(a0) move.w d0,swm10_3-@init_r_draw(a0) move.w d0,swm10_4-@init_r_draw(a0) move.w d0,swm10_5-@init_r_draw(a0) move.w d0,swm10_6-@init_r_draw(a0) move.w d0,swm10_8-@init_r_draw(a0) add.l d0,d0 ; d0 = -2*_SCREENWIDTH move.w d0,swm20_1-@init_r_draw(a0) move.w d0,swm20_3-@init_r_draw(a0) move.w d0,swm20_4-@init_r_draw(a0) move.w d0,swm20_5-@init_r_draw(a0) move.w d0,swm20_6-@init_r_draw(a0) move.w d0,swm20_8-@init_r_draw(a0) sub.l _SCREENWIDTH(a4),d0 ; d0 = -3*_SCREENWIDTH move.w d0,swm30_1-@init_r_draw(a0) move.w d0,swm30_3-@init_r_draw(a0) move.w d0,swm30_4-@init_r_draw(a0) move.w d0,swm30_5-@init_r_draw(a0) move.w d0,swm30_6-@init_r_draw(a0) move.w d0,swm30_8-@init_r_draw(a0) movea.l _SysBase(a4),a6 jsr _LVOCacheClearU(a6) movem.l (sp)+,a6 rts ;----------------------------------------------------------------------- cnop 0,4 _R_DrawColumnLow @R_DrawColumnLow movem.l d3-d4/d6-d7/a2/a3,-(sp) move.l _dc_yh(a4),d7 ; count = _dc_yh - _dc_yl move.l _dc_yl(a4),d0 sub.l d0,d7 bmi.w l.end1 move.l _dc_x(a4),d1 ; dest = ylookup[_dc_yl] + columnofs[_dc_x] movea.l _ylookup2(a4),a0 ; = ylookup2[_dc_yl] + (_dc_x<<1) add.l d1,d1 ; dc_x <<= 1 move.l (a0,d0.l*4),a0 adda.l d1,a0 move.l _dc_colormap(a4),d4 move.l _dc_source(a4),a1 move.l _dc_iscale(a4),d1 ; frac = _dc_texturemid + (_dc_yl-centery)*fracstep sub.l _centery(a4),d0 muls.l d1,d0 add.l _dc_texturemid(a4),d0 moveq #$7f,d3 sw42_1 lea (SCREENWIDTH*4).w,a3 ; d7: cnt >> 2 ; a0: chunky ; a1: texture ; d0: frac (uuuu uuuu uuuu uuuu 0000 0000 0UUU UUUU) ; d1: dfrac (.......................................) ; d3: $7f ; d4: light table aligned to 256 byte boundary ; a3: SCREENWIDTH move.l d7,d6 and.w #3,d6 swap d0 ; swap decimals and fraction swap d1 add.w l.width_tab1(pc,d6.w*2),a0 lsr.w #2,d7 move.w l.tmap_tab1(pc,d6.w*2),d6 and.w d3,d0 sub.w d1,d0 add.l d1,d0 ; setup the X flag jmp l.loop1(pc,d6.w) cnop 0,4 l.width_tab1 swm30_1 dc.w -3*SCREENWIDTH swm20_1 dc.w -2*SCREENWIDTH swm10_1 dc.w -1*SCREENWIDTH dc.w 0 l.tmap_tab1 dc.w l.01-l.loop1 dc.w l.11-l.loop1 dc.w l.21-l.loop1 dc.w l.31-l.loop1 l.loop1 l.31 move.b (a1,d0.w),d4 addx.l d1,d0 move.l d4,a2 move.w (a2),d6 and.w d3,d0 move.b (a2),d6 move.w d6,(a0) l.21 move.b (a1,d0.w),d4 addx.l d1,d0 move.l d4,a2 move.w (a2),d6 and.w d3,d0 move.b (a2),d6 sw12_1 move.w d6,SCREENWIDTH(a0) l.11 move.b (a1,d0.w),d4 addx.l d1,d0 move.l d4,a2 move.w (a2),d6 and.w d3,d0 move.b (a2),d6 sw22_1 move.w d6,SCREENWIDTH*2(a0) l.01 move.b (a1,d0.w),d4 addx.l d1,d0 move.l d4,a2 move.w (a2),d6 and.w d3,d0 move.b (a2),d6 sw32_1 move.w d6,SCREENWIDTH*3(a0) add.l a3,a0 l.loop_end1 dbf d7,l.loop1 l.end1 movem.l (sp)+,d3-d4/d6-d7/a2/a3 rts ;----------------------------------------------------------------------- cnop 0,4 _R_DrawSpanLow @R_DrawSpanLow movem.l d2-d7/a2-a4,-(sp) move.l _ds_y(a4),d0 move.l _ds_x1(a4),d1 ; dest = ylookup[_ds_y] + columnofs[_ds_x1] movea.l _ylookup2(a4),a0 ; = ylookup2[_ds_y] + _ds_x add.l d1,d1 move.l (a0,d0.l*4),a0 adda.l d1,a0 move.l _ds_x2(a4),d7 ; count = _ds_x2 - _ds_x1 move.l _ds_source(a4),a1 add.l d7,d7 move.l _ds_colormap(a4),a2 sub.l d1,d7 addq.l #2,d7 move.l _ds_xfrac(a4),d0 move.l _ds_yfrac(a4),d1 move.l _ds_xstep(a4),d2 move.l _ds_ystep(a4),d3 move.l a0,d4 ; notice, that this address must already be aligned by word btst #1,d4 beq.b l.skips2 move.l d0,d5 ; do the unaligned pixels move.l d1,d6 ; so we can write to longword swap d5 ; boundary in the main loop swap d6 and.w #$3f,d5 and.w #$3f,d6 ; this is the worst possible lsl.w #6,d6 ; way but hey, this is not a loop or.w d5,d6 move.b (a1,d6.w),d5 add.l d2,d0 move.b (a2,d5.w),(a0)+ add.l d3,d1 move.b (a2,d5.w),(a0)+ ; I know this is crap but spare me the comments subq.l #2,d7 l.skips2 move.l a2,d4 lea $1000(a1),a1 ; catch 22 move.l a0,a3 add.l d7,a3 move.l d7,d5 and.b #~7,d5 move.l a0,a4 add.l d5,a4 eor.w d0,d1 ; swap fraction parts for addx eor.w d2,d3 eor.w d1,d0 eor.w d3,d2 eor.w d0,d1 eor.w d2,d3 swap d0 swap d1 swap d2 swap d3 lsl.w #6,d1 lsl.w #6,d3 move.w #$ffc0,d6 move.w #$f03f,d7 lsr.w #3,d5 beq.b l.skip_loop22 sub.w d2,d0 add.l d2,d0 ; setup the X flag l.loop22 or.w d6,d0 ; Not really and exercise in optimizing or.w d7,d1 ; but I guess it's faster than 1x1 for 030 and.w d1,d0 ; where this low detail business is needed. addx.l d3,d1 move.b (a1,d0.w),d4 addx.l d2,d0 move.l d4,a2 move.w (a2),d5 or.w d6,d0 move.b (a2),d5 or.w d7,d1 and.w d1,d0 swap d5 addx.l d3,d1 move.b (a1,d0.w),d4 addx.l d2,d0 move.l d4,a2 move.w (a2),d5 or.w d6,d0 move.b (a2),d5 or.w d7,d1 and.w d1,d0 move.l d5,(a0)+ addx.l d3,d1 move.b (a1,d0.w),d4 addx.l d2,d0 move.l d4,a2 move.w (a2),d5 or.w d6,d0 move.b (a2),d5 or.w d7,d1 and.w d1,d0 swap d5 addx.l d3,d1 move.b (a1,d0.w),d4 addx.l d2,d0 move.l d4,a2 move.w (a2),d5 move.b (a2),d5 move.l d5,(a0)+ cmp.l a0,a4 bne.b l.loop22 l.skip_loop22 sub.w d2,d0 add.l d2,d0 bra.b l.loop_end22 l.loop32 or.w d6,d0 or.w d7,d1 and.w d1,d0 addx.l d3,d1 move.b (a1,d0.w),d4 addx.l d2,d0 move.l d4,a2 move.b (a2),(a0)+ move.b (a2),(a0)+ l.loop_end22 cmp.l a0,a3 bne.b l.loop32 l.end22 movem.l (sp)+,d2-d7/a2-a4 rts ;----------------------------------------------------------------------- cnop 0,4 _R_DrawTranslatedColumnLow @R_DrawTranslatedColumnLow movem.l d2-d4/d6-d7/a2/a3,-(sp) move.l _dc_yh(a4),d7 ; count = _dc_yh - _dc_yl move.l _dc_yl(a4),d0 sub.l d0,d7 bmi.w l.end3 move.l _dc_x(a4),d1 ; dest = ylookup[_dc_yl] + columnofs[_dc_x] movea.l _ylookup2(a4),a0 ; = ylookup2[_dc_yl] + _dc_x add.l d1,d1 move.l (a0,d0.l*4),a0 ;; movea.l _columnofs(a4),a1 ;; add.l (a1,d1.l*4),a0 adda.l d1,a0 ; new move.l _dc_translation(a4),d2 move.l _dc_colormap(a4),d4 move.l _dc_source(a4),a1 move.l _dc_iscale(a4),d1 ; frac = _dc_texturemid + (_dc_yl-centery)*fracstep sub.l _centery(a4),d0 muls.l d1,d0 add.l _dc_texturemid(a4),d0 moveq #$7f,d3 sw42_3 lea (SCREENWIDTH*4).w,a3 ; d7: cnt >> 2 ; a0: chunky ; a1: texture ; d0: frac (uuuu uuuu uuuu uuuu 0000 0000 0UUU UUUU) ; d1: dfrac (.......................................) ; d3: $7f ; d4: light table aligned to 256 byte boundary ; d2: translation table aligned to 256 byte boundary ; a3: SCREENWIDTH move.l d7,d6 and.w #3,d6 swap d0 ; swap decimals and fraction swap d1 add.w l.width_tab3(pc,d6.w*2),a0 lsr.w #2,d7 move.w l.tmap_tab3(pc,d6.w*2),d6 and.w d3,d0 sub.w d1,d0 add.l d1,d0 ; setup the X flag jmp l.loop3(pc,d6.w) cnop 0,4 l.width_tab3 swm30_3 dc.w -3*SCREENWIDTH swm20_3 dc.w -2*SCREENWIDTH swm10_3 dc.w -1*SCREENWIDTH dc.w 0 l.tmap_tab3 dc.w l.03-l.loop3 dc.w l.13-l.loop3 dc.w l.23-l.loop3 dc.w l.33-l.loop3 l.loop3 l.33 move.b (a1,d0.w),d2 move.l d2,a2 addx.l d1,d0 move.b (a2),d4 move.l d4,a2 and.w d3,d0 move.w (a2),d6 move.b (a2),d6 move.w d6,(a0) l.23 move.b (a1,d0.w),d2 move.l d2,a2 addx.l d1,d0 move.b (a2),d4 move.l d4,a2 and.w d3,d0 move.w (a2),d6 move.b (a2),d6 sw12_3 move.w d6,SCREENWIDTH(a0) l.13 move.b (a1,d0.w),d2 move.l d2,a2 addx.l d1,d0 move.b (a2),d4 move.l d4,a2 and.w d3,d0 move.w (a2),d6 move.b (a2),d6 sw22_3 move.w d6,SCREENWIDTH*2(a0) l.03 move.b (a1,d0.w),d2 move.l d2,a2 addx.l d1,d0 move.b (a2),d4 move.l d4,a2 and.w d3,d0 move.w (a2),d6 move.b (a2),d6 sw32_3 move.b d6,SCREENWIDTH*3(a0) add.l a3,a0 l.loop_end3 dbf d7,l.loop3 l.end3 movem.l (sp)+,d2-d4/d6-d7/a2/a3 rts ;----------------------------------------------------------------------- cnop 0,4 _R_DrawFuzzColumnLow @R_DrawFuzzColumnLow movem.l d4/d6-d7/a2/a3,-(sp) move.l _viewheight(a4),d1 subq.l #1,d1 move.l _dc_yh(a4),d7 ; count = _dc_yh - _dc_yl cmp.l d1,d7 bne.b l.skip_yh4 subq.l #1,d1 move.l d1,d7 l.skip_yh4 move.l _dc_yl(a4),d0 bne.b l.skip_yl4 moveq #1,d0 l.skip_yl4 sub.l d0,d7 bmi.w l.end4 move.l _dc_x(a4),d1 ; dest = ylookup[_dc_yl] + columnofs[_dc_x] movea.l _ylookup2(a4),a0 ; = ylookup2[_dc_yl] + _dc_x add.l d1,d1 move.l (a0,d0.l*4),a0 adda.l d1,a0 move.l _colormaps(a4),d4 add.l #6*256,d4 movea.l _fuzzoffset(a4),a1 move.l _fuzzpos(a4),d0 ; bring it down l.pos_loop4 sub.l _SCREENHEIGHT(a4),d0 bpl l.pos_loop4 add.l _SCREENHEIGHT(a4),d0 add.l d0,a1 sw42_4 lea (SCREENWIDTH*4).w,a3 ; d7: cnt >> 2 ; a0: chunky ; a1: fuzzoffset ; d0: frac (uuuu uuuu uuuu uuuu 0000 0000 0UUU UUUU) ; d1: dfrac (.......................................) ; d3: $7f ; d4: light table aligned to 256 byte boundary ; a3: SCREENWIDTH move.l d7,d6 and.w #3,d6 add.w l.width_tab4(pc,d6.w*2),a0 lsr.w #2,d7 move.w l.tmap_tab4(pc,d6.w*2),d6 jmp l.loop4(pc,d6.w) cnop 0,4 l.width_tab4 swm30_4 dc.w -3*SCREENWIDTH swm20_4 dc.w -2*SCREENWIDTH swm10_4 dc.w -1*SCREENWIDTH dc.w 0 l.tmap_tab4 dc.w l.04-l.loop4 dc.w l.14-l.loop4 dc.w l.24-l.loop4 dc.w l.34-l.loop4 l.loop4 l.34 move.l a0,a2 ; This is essentially add.l (a1)+,a2 ; just moving memory around. move.b (a2),d4 move.l d4,a2 move.w (a2),d6 move.b (a2),d6 move.w d6,(a0) l.24 sw12_4 lea SCREENWIDTH(a0),a2 add.l (a1)+,a2 move.b (a2),d4 move.l d4,a2 move.w (a2),d6 move.b (a2),d6 sw12_4a move.w d6,SCREENWIDTH(a0) l.14 sw22_4 lea 2*SCREENWIDTH(a0),a2 add.l (a1)+,a2 move.b (a2),d4 move.l d4,a2 move.w (a2),d6 move.b (a2),d6 sw22_4a move.w d6,2*SCREENWIDTH(a0) l.04 sw32_4 lea 3*SCREENWIDTH(a0),a2 add.l (a1)+,a2 move.b (a2),d4 move.l d4,a2 move.w (a2),d6 move.b (a2),d6 sw32_4a move.w d6,3*SCREENWIDTH(a0) add.l a3,a0 l.loop_end4 dbf d7,l.loop4 sub.l _fuzzoffset(a4),a1 move.l a1,_fuzzpos l.end4 movem.l (sp)+,d4/d6-d7/a2/a3 rts ;----------------------------------------------------------------------- ; high detail versions ;----------------------------------------------------------------------- cnop 0,4 _R_DrawFuzzColumn @R_DrawFuzzColumn movem.l d4/d6-d7/a2/a3,-(sp) move.l _viewheight(a4),d1 subq.l #1,d1 move.l _dc_yh(a4),d7 ; count = _dc_yh - _dc_yl cmp.l d1,d7 bne.b l.skip_yh5 subq.l #1,d1 move.l d1,d7 l.skip_yh5 move.l _dc_yl(a4),d0 bne.b l.skip_yl5 moveq #1,d0 l.skip_yl5 sub.l d0,d7 bmi.w l.end5 movea.l _ylookup2(a4),a0 ; dest = ylookup2[_dc_yl] + dc_x move.l (a0,d0.l*4),a0 adda.l _dc_x(a4),a0 move.l _colormaps(a4),d4 add.l #6*256,d4 movea.l _fuzzoffset(a4),a1 move.l _fuzzpos(a4),d0 l.pos_loop5 sub.l _SCREENHEIGHT(a4),d0 bpl l.pos_loop5 add.l _SCREENHEIGHT(a4),d0 add.l d0,a1 sw42_5 lea (SCREENWIDTH*4).w,a3 ; d7: cnt >> 2 ; a0: chunky ; a1: fuzzoffset ; d0: frac (uuuu uuuu uuuu uuuu 0000 0000 0UUU UUUU) ; d1: dfrac (.......................................) ; d3: $7f ; d4: light table aligned to 256 byte boundary ; a3: SCREENWIDTH move.l d7,d6 and.w #3,d6 add.w l.width_tab5(pc,d6.w*2),a0 lsr.w #2,d7 move.w l.tmap_tab5(pc,d6.w*2),d6 jmp l.loop5(pc,d6.w) cnop 0,4 l.width_tab5 swm30_5 dc.w -3*SCREENWIDTH swm20_5 dc.w -2*SCREENWIDTH swm10_5 dc.w -1*SCREENWIDTH dc.w 0 l.tmap_tab5 dc.w l.05-l.loop5 dc.w l.15-l.loop5 dc.w l.25-l.loop5 dc.w l.35-l.loop5 l.loop5 l.35 move.l a0,a2 ; This is essentially add.l (a1)+,a2 ; just moving memory around. move.b (a2),d4 move.l d4,a2 ; Not 060 optimized but move.b (a2),(a0) ; if you have hordes of l.25 sw12_5 lea SCREENWIDTH(a0),a2 ; invisible monsters which add.l (a1)+,a2 ; slow down the game too much, move.b (a2),d4 ; do tell me. move.l d4,a2 sw12_5a move.b (a2),SCREENWIDTH(a0) l.15 sw22_5 lea 2*SCREENWIDTH(a0),a2 add.l (a1)+,a2 move.b (a2),d4 move.l d4,a2 sw22_5a move.b (a2),2*SCREENWIDTH(a0) l.05 sw32_5 lea 3*SCREENWIDTH(a0),a2 add.l (a1)+,a2 move.b (a2),d4 move.l d4,a2 sw32_5a move.b (a2),3*SCREENWIDTH(a0) add.l a3,a0 l.loop_end5 dbf d7,l.loop5 sub.l _fuzzoffset(a4),a1 move.l a1,_fuzzpos l.end5 movem.l (sp)+,d4/d6-d7/a2/a3 rts ;----------------------------------------------------------------------- cnop 0,4 _R_DrawTranslatedColumn ; no 060 version :( @R_DrawTranslatedColumn movem.l d2-d4/d6-d7/a2/a3,-(sp) move.l _dc_yh(a4),d7 ; count = _dc_yh - _dc_yl move.l _dc_yl(a4),d0 sub.l d0,d7 bmi.w l.end6 ;; move.l _dc_x(a4),d1 ; dest = ylookup[_dc_yl] + columnofs[_dc_x] movea.l _ylookup2(a4),a0 ; = ylookup2[_dc_yl] + _dc_x move.l (a0,d0.l*4),a0 ;; movea.l _columnofs(a4),a1 ;; add.l (a1,d1.l*4),a0 adda.l _dc_x(a4),a0 ; new move.l _dc_translation(a4),d2 move.l _dc_colormap(a4),d4 move.l _dc_source(a4),a1 move.l _dc_iscale(a4),d1 ; frac = _dc_texturemid + (_dc_yl-centery)*fracstep sub.l _centery(a4),d0 muls.l d1,d0 add.l _dc_texturemid(a4),d0 moveq #$7f,d3 sw42_6 lea (SCREENWIDTH*4).w,a3 ; d7: cnt >> 2 ; a0: chunky ; a1: texture ; d0: frac (uuuu uuuu uuuu uuuu 0000 0000 0UUU UUUU) ; d1: dfrac (.......................................) ; d3: $7f ; d4: light table aligned to 256 byte boundary ; d2: translation table aligned to 256 byte boundary ; a3: SCREENWIDTH move.l d7,d6 and.w #3,d6 swap d0 ; swap decimals and fraction swap d1 add.w l.width_tab6(pc,d6.w*2),a0 lsr.w #2,d7 move.w l.tmap_tab6(pc,d6.w*2),d6 and.w d3,d0 sub.w d1,d0 add.l d1,d0 ; setup the X flag jmp l.loop6(pc,d6.w) cnop 0,4 l.width_tab6 swm30_6 dc.w -3*SCREENWIDTH swm20_6 dc.w -2*SCREENWIDTH swm10_6 dc.w -1*SCREENWIDTH dc.w 0 l.tmap_tab6 dc.w l.06-l.loop6 dc.w l.16-l.loop6 dc.w l.26-l.loop6 dc.w l.36-l.loop6 l.loop6 l.36 move.b (a1,d0.w),d2 move.l d2,a2 addx.l d1,d0 move.b (a2),d4 and.w d3,d0 move.l d4,a2 move.b (a2),(a0) l.26 move.b (a1,d0.w),d2 move.l d2,a2 addx.l d1,d0 move.b (a2),d4 and.w d3,d0 move.l d4,a2 sw12_6 move.b (a2),SCREENWIDTH(a0) l.16 move.b (a1,d0.w),d2 move.l d2,a2 addx.l d1,d0 move.b (a2),d4 and.w d3,d0 move.l d4,a2 sw22_6 move.b (a2),SCREENWIDTH*2(a0) l.06 move.b (a1,d0.w),d2 move.l d2,a2 addx.l d1,d0 move.b (a2),d4 and.w d3,d0 move.l d4,a2 sw32_6 move.b (a2),SCREENWIDTH*3(a0) add.l a3,a0 l.loop_end6 dbf d7,l.loop6 l.end6 movem.l (sp)+,d2-d4/d6-d7/a2/a3 rts ;----------------------------------------------------------------------- cnop 0,4 ; routine from j.selck@flensburg.netsurf.de (Aki's 040 routine is faster) ;_R_DrawColumn_030 ;@R_DrawColumn_030 ; movem.l d3-d7/a2-a5,-(sp) ; move.l _dc_yl(a4),d0 ; move.l _dc_yh(a4),d7 ; sub.l d0,d7 ; bmi.b 1$ ; move.l _dc_x(a4),d1 ; movea.l _columnofs(a4),a5 ; lea (a5,d1.l*4),a1 ; movea.l _ylookup(a4),a5 ; movea.l (a5,d0.l*4),a2 ; adda.l (a1),a2 ; move.l _dc_iscale(a4),d6 ; sub.l _centery(a4),d0 ; muls.l d6,d0 ; move.l _dc_texturemid(a4),d5 ; add.l d0,d5 ; movea.l _dc_source(a4),a3 ; movea.l _dc_colormap(a4),a4 ; moveq #127,d4 ; move.l _SCREENWIDTH(a4),d3 ; moveq #0,d1 ; ensure high bits of d1 are clear ; add.w d6,d5 ; frac += fracstep (also sets X flag) ; swap d5 ; swap(frac) ; swap d6 ; swap(fracstep) ; and.w d4,d5 ; (frac>>16)&127 ;2$ move.b (a3,d5.w),d1 ; dc_source[(frac>>FRACBITS)&127] ; move.b (a4,d1.w),(a2) ; *dest = dc_colormap[d1] ; addx.l d6,d5 ; swap(frac += fracstep), use & set X ; adda.l d3,a2 ; dest += SCREENWIDTH ; and.w d4,d5 ; (frac>>16)&127 ; dbra d7,2$ ;1$ movem.l (sp)+,d3-d7/a2-a5 ; rts ;----------------------------------------------------------------------- cnop 0,4 _R_DrawColumn_060 @R_DrawColumn_060 movem.l d2-d3/d5-d7/a2/a3,-(sp) move.l (_dc_yh),d7 ; count = _dc_yh - _dc_yl move.l (_dc_yl),d0 sub.l d0,d7 bmi.w l.end7 ;; move.l (_dc_x),d1 ; dest = ylookup[_dc_yl] + columnofs[_dc_x] movea.l (_ylookup2),a0 ; = ylookup2[_dc_yl] + _dc_x move.l (a0,d0.l*4),a0 ;; movea.l (_columnofs),a1 ;; add.l (a1,d1.l*4),a0 adda.l (_dc_x),a0 ; new move.l (_dc_colormap),a2 move.l (_dc_source),a1 move.l (_dc_iscale),d1 ; frac = _dc_texturemid + (_dc_yl-centery)*fracstep sub.l (_centery),d0 muls.l d1,d0 add.l (_dc_texturemid),d0 moveq #$7f,d3 move.l (_SCREENWIDTH),a3 move.l d7,d6 ; Do the leftover iterations in and.w #3,d6 ; this loop. addq.w #1,d6 l.skip_loop7 move.l d0,d5 swap d5 and.l d3,d5 move.b (a1,d5.w),d5 add.l d1,d0 move.b (a2,d5.w),(a0) add.l a3,a0 subq.w #1,d6 bne.b l.skip_loop7 ; d7: cnt >> 2 ; a0: chunky ; a1: texture ; a2: light_table ; d0: frac (uuuu uuuu uuuu uuuu 0000 0000 0UUU UUUU) ; d1: dfrac*2 (.......................................) ; d2: frac+dfrac(.......................................) ; d3: $7f ; a3: SCREENWIDTH .skip7 lsr.l #2,d7 subq.l #1,d7 bmi.b l.end7 add.l a3,a3 move.l d0,d2 add.l a3,a3 add.l d1,d2 add.l d1,d1 eor.w d0,d2 ; swap the fraction part for addx eor.w d2,d0 ; assuming 16.16 fixed point eor.w d0,d2 swap d0 ; swap decimals and fraction swap d1 swap d2 moveq #0,d5 and.w d3,d2 and.w d3,d0 sub.w d1,d0 add.l d1,d0 ; setup the X flag move.b (a1,d2.w),d5 l.loop7 ; This should be reasonably scheduled for ; m68060. It should perform well on other processors ; too. That AGU stall still bothers me though. move.b (a1,d0.w),d6 ; stall + pOEP but allows sOEP addx.l d1,d2 ; pOEP only move.b (a2,d5.l),d5 ; pOEP but allows sOEP and.w d3,d2 ; sOEP move.b (a2,d6.l),d6 ; pOEP but allows sOEP sw12_7 move.b d5,SCREENWIDTH(a0) ; sOEP addx.l d1,d0 ; pOEP only move.b (a1,d2.w),d5 ; pOEP but allows sOEP and.w d3,d0 ; sOEP move.b d6,(a0) ; pOEP ; = ~4 cycles/pixel ; + cache misses ; The vertical writes are the true timehog of the loop ; because of the characteristics of the copyback cache ; operation. ; Better mark the chunky buffer as write through ; with the MMU and have all the horizontal writes ; be longs aligned to longword boundary. move.b (a1,d0.w),d6 addx.l d1,d2 move.b (a2,d5.l),d5 and.w d3,d2 move.b (a2,d6.l),d6 sw32_7 move.b d5,SCREENWIDTH*3(a0) addx.l d1,d0 move.b (a1,d2.w),d5 and.w d3,d0 sw22_7 move.b d6,SCREENWIDTH*2(a0) add.l a3,a0 l.loop_end7 dbf d7,l.loop7 ; it's faster to divide it to two lines on 060 ; and shouldn't be slower on 040. ; move.b (a1,d0.w),d6 ; new ; move.b (a2,d6.l),d6 ; new ; move.b d6,(a0) ; new l.end7 movem.l (sp)+,d2-d3/d5-d7/a2/a3 rts ;----------------------------------------------------------------------- cnop 0,4 ; 040 version _R_DrawColumn_040 @R_DrawColumn_040 movem.l d3-d4/d6-d7/a2/a3,-(sp) move.l _dc_yh(a4),d7 ; count = _dc_yh - _dc_yl move.l _dc_yl(a4),d0 sub.l d0,d7 bmi.w l.end8 movea.l _ylookup2(a4),a0 ; dest = ylookup2[_dc_yl] + _dc_x move.l (a0,d0.l*4),a0 adda.l _dc_x(a4),a0 move.l _dc_colormap(a4),d4 move.l _dc_source(a4),a1 move.l _dc_iscale(a4),d1 ; frac = _dc_texturemid + (_dc_yl-centery)*fracstep sub.l _centery(a4),d0 muls.l d1,d0 add.l _dc_texturemid(a4),d0 moveq #$7f,d3 sw42_8 lea (SCREENWIDTH*4).w,a3 ; d7: cnt >> 2 ; a0: chunky ; a1: texture ; d0: frac (uuuu uuuu uuuu uuuu 0000 0000 0UUU UUUU) ; d1: dfrac (.......................................) ; d3: $7f ; d4: light table aligned to 256 byte boundary ; a3: SCREENWIDTH move.l d7,d6 and.w #3,d6 swap d0 ; swap decimals and fraction swap d1 adda.w l.width_tab8(pc,d6.w*2),a0 lsr.w #2,d7 move.w l.tmap_tab8(pc,d6.w*2),d6 and.w d3,d0 sub.w d1,d0 add.l d1,d0 ; setup the X flag jmp l.loop8(pc,d6.w) cnop 0,4 l.width_tab8 swm30_8 dc.w -3*SCREENWIDTH swm20_8 dc.w -2*SCREENWIDTH swm10_8 dc.w -1*SCREENWIDTH dc.w 0 l.tmap_tab8 dc.w l.08-l.loop8 dc.w l.18-l.loop8 dc.w l.28-l.loop8 dc.w l.38-l.loop8 l.loop8 l.38 move.b (a1,d0.w),d4 addx.l d1,d0 move.l d4,a2 and.w d3,d0 move.b (a2),(a0) l.28 move.b (a1,d0.w),d4 addx.l d1,d0 move.l d4,a2 and.w d3,d0 sw12_8 move.b (a2),SCREENWIDTH(a0) l.18 move.b (a1,d0.w),d4 addx.l d1,d0 move.l d4,a2 and.w d3,d0 sw22_8 move.b (a2),SCREENWIDTH*2(a0) l.08 move.b (a1,d0.w),d4 addx.l d1,d0 move.l d4,a2 and.w d3,d0 sw32_8 move.b (a2),SCREENWIDTH*3(a0) adda.l a3,a0 l.loop_end8 dbf d7,l.loop8 l.end8 movem.l (sp)+,d3-d4/d6-d7/a2/a3 rts ;----------------------------------------------------------------------- ; This faster version by Aki M Laukkanen <amlaukka@cc.helsinki.fi> cnop 0,4 _R_DrawSpan_060 @R_DrawSpan_060 movem.l d2-d7/a2/a3,-(sp) move.l (_ds_y),d0 move.l (_ds_x1),d1 ; dest = ylookup[_ds_y] + columnofs[_ds_x1] movea.l (_ylookup2),a0 ; = ylookup2[_ds_y] + _ds_x1 move.l (a0,d0.l*4),a0 ;; movea.l (_columnofs),a1 ;; add.l (a1,d1.l*4),a0 adda.l d1,a0 ; new move.l (_ds_source),a1 move.l (_ds_colormap),a2 move.l (_ds_x2),d7 ; count = _ds_x2 - _ds_x1 sub.l d1,d7 addq.l #1,d7 move.l (_ds_xfrac),d0 move.l (_ds_yfrac),d1 move.l (_ds_xstep),d2 move.l (_ds_ystep),d3 move.l a0,d4 btst #0,d4 beq.b l.skipb9 move.l d0,d5 ; do the unaligned pixels move.l d1,d6 ; so we can write to longword swap d5 ; boundary in the main loop swap d6 and.w #$3f,d5 and.w #$3f,d6 lsl.w #6,d6 or.w d5,d6 move.b (a1,d6.w),d5 add.l d2,d0 move.b (a2,d5.w),(a0)+ add.l d3,d1 move.l a0,d4 subq.l #1,d7 l.skipb9 btst #1,d4 beq.b l.skips9 moveq #2,d4 cmp.l d4,d7 bls.b l.skips9 move.l d0,d5 ; write two pixels move.l d1,d6 swap d5 swap d6 and.w #$3f,d5 and.w #$3f,d6 lsl.w #6,d6 or.w d5,d6 move.b (a1,d6.w),d5 move.w (a2,d5.w),d4 add.l d2,d0 add.l d3,d1 move.l d0,d5 move.l d1,d6 swap d5 swap d6 and.w #$3f,d5 and.w #$3f,d6 lsl.w #6,d6 or.w d5,d6 move.b (a1,d6.w),d5 move.b (a2,d5.w),d4 add.l d2,d0 move.w d4,(a0)+ add.l d3,d1 subq.l #2,d7 l.skips9 move.l d7,d6 ; setup registers and.w #3,d6 move.l d6,a3 eor.w d0,d1 ; swap fraction parts for addx eor.w d2,d3 eor.w d1,d0 eor.w d3,d2 eor.w d0,d1 eor.w d2,d3 swap d0 swap d1 swap d2 swap d3 lsl.w #6,d1 lsl.w #6,d3 moveq #0,d6 moveq #0,d5 sub.l #$f000,a1 lsr.l #2,d7 beq.w l.skip_loop29 subq.l #1,d7 sub.w d3,d1 add.l d3,d1 ; setup the X flag or.w #$ffc0,d0 or.w #$f03f,d1 move.w d0,d6 and.w d1,d6 bra.b l.start_loop29 cnop 0,8 l.loop29 or.w #$ffc0,d0 ; pOEP or.w #$f03f,d1 ; sOEP move.b (a2,d5.l),d4 ; pOEP but allows sOEP move.w d0,d6 ; sOEP and.w d1,d6 ; pOEP move.l d4,(a0)+ ; sOEP l.start_loop29 addx.l d2,d0 ; pOEP only addx.l d3,d1 ; pOEP only move.b (a1,d6.l),d5 ; pOEP but allows sOEP or.w #$ffc0,d0 ; sOEP or.w #$f03f,d1 ; pOEP move.w d0,d6 ; sOEP move.w (a2,d5.l),d4 ; pOEP but allows sOEP and.w d1,d6 ; sOEP addx.l d2,d0 ; pOEP only addx.l d3,d1 ; pOEP only move.b (a1,d6.l),d5 ; pOEP but allows sOEP or.w #$ffc0,d0 ; sOEP or.w #$f03f,d1 ; pOEP move.w d0,d6 ; sOEP move.b (a2,d5.l),d4 ; pOEP but allows sOEP and.w d1,d6 ; sOEP addx.l d2,d0 ; pOEP only addx.l d3,d1 ; pOEP only move.b (a1,d6.l),d5 ; pOEP but allows sOEP or.w #$ffc0,d0 ; sOEP or.w #$f03f,d1 ; pOEP move.w d0,d6 ; sOEP swap d4 ; pOEP only move.w (a2,d5.l),d4 ; pOEP but allows sOEP and.w d1,d6 ; sOEP addx.l d2,d0 ; pOEP only addx.l d3,d1 ; pOEP only move.b (a1,d6.l),d5 ; pOEP but allows sOEP dbf d7,l.loop29 ; pOEP only = 7.75 cycles/pixel move.b (a2,d5.l),d4 move.l d4,(a0)+ l.skip_loop29 sub.w d3,d1 add.l d3,d1 move.l a3,d7 bra.b l.loop_end29 l.loop39 or.w #$ffc0,d0 or.w #$f03f,d1 move.w d0,d6 and.w d1,d6 addx.l d2,d0 addx.l d3,d1 move.b (a1,d6.l),d5 move.b (a2,d5.l),(a0)+ l.loop_end29 dbf d7,l.loop39 l.end29 movem.l (sp)+,d2-d7/a2/a3 rts cnop 0,4 ;----------------------------------------------------------------------- ; 030/040 version _R_DrawSpan_040 @R_DrawSpan_040 movem.l d2-d7/a2-a4,-(sp) move.l _ds_y(a4),d0 move.l _ds_x1(a4),d1 ; dest = ylookup[_ds_y] + columnofs[_ds_x1] movea.l _ylookup2(a4),a0 ; = ylookup2[_ds_y] + _ds_x1 move.l (a0,d0.l*4),a0 ;; movea.l _columnofs(a4),a1 ;; add.l (a1,d1.l*4),a0 adda.l d1,a0 ; new move.l _ds_source(a4),a1 move.l _ds_colormap(a4),a2 move.l _ds_x2(a4),d7 ; count = _ds_x2 - _ds_x1 sub.l d1,d7 addq.l #1,d7 move.l _ds_xfrac(a4),d0 move.l _ds_yfrac(a4),d1 move.l _ds_xstep(a4),d2 move.l _ds_ystep(a4),d3 move.l a0,d4 btst #0,d4 beq.b l.skipb0 move.l d0,d5 ; do the unaligned pixels move.l d1,d6 ; so we can write to longword swap d5 ; boundary in the main loop swap d6 and.w #$3f,d5 and.w #$3f,d6 lsl.w #6,d6 or.w d5,d6 move.b (a1,d6.w),d5 add.l d2,d0 move.b (a2,d5.w),(a0)+ add.l d3,d1 move.l a0,d4 subq.l #1,d7 l.skipb0 btst #1,d4 beq.b l.skips0 moveq #2,d4 cmp.l d4,d7 bls.b l.skips0 move.l d0,d5 ; write two pixels move.l d1,d6 swap d5 swap d6 and.w #$3f,d5 and.w #$3f,d6 lsl.w #6,d6 or.w d5,d6 move.b (a1,d6.w),d5 move.w (a2,d5.w),d4 add.l d2,d0 add.l d3,d1 move.l d0,d5 move.l d1,d6 swap d5 swap d6 and.w #$3f,d5 and.w #$3f,d6 lsl.w #6,d6 or.w d5,d6 move.b (a1,d6.w),d5 move.b (a2,d5.w),d4 add.l d2,d0 move.w d4,(a0)+ add.l d3,d1 subq.l #2,d7 l.skips0 move.l a2,d4 add.l #$1000,a1 ; catch 22 move.l a0,a3 add.l d7,a3 move.l d7,d5 and.b #~3,d5 move.l a0,a4 add.l d5,a4 eor.w d0,d1 ; swap fraction parts for addx eor.w d2,d3 eor.w d1,d0 eor.w d3,d2 eor.w d0,d1 eor.w d2,d3 swap d0 swap d1 swap d2 swap d3 lsl.w #6,d1 lsl.w #6,d3 move.w #$ffc0,d6 move.w #$f03f,d7 lsr.w #2,d5 beq.b l.skip_loop20 sub.w d2,d0 add.l d2,d0 ; setup the X flag l.loop20 or.w d6,d0 or.w d7,d1 and.w d1,d0 addx.l d3,d1 move.b (a1,d0.w),d4 addx.l d2,d0 move.l d4,a2 move.w (a2),d5 or.w d6,d0 or.w d7,d1 and.w d1,d0 addx.l d3,d1 move.b (a1,d0.w),d4 addx.l d2,d0 move.l d4,a2 move.b (a2),d5 swap d5 or.w d6,d0 or.w d7,d1 and.w d1,d0 addx.l d3,d1 move.b (a1,d0.w),d4 addx.l d2,d0 move.l d4,a2 move.w (a2),d5 or.w d6,d0 or.w d7,d1 and.w d1,d0 addx.l d3,d1 move.b (a1,d0.w),d4 addx.l d2,d0 move.l d4,a2 move.b (a2),d5 move.l d5,(a0)+ cmp.l a0,a4 bne.b l.loop20 l.skip_loop20 sub.w d2,d0 add.l d2,d0 bra.b l.loop_end20 l.loop30 or.w d6,d0 or.w d7,d1 and.w d1,d0 addx.l d3,d1 move.b (a1,d0.w),d4 addx.l d2,d0 move.l d4,a2 move.b (a2),(a0)+ l.loop_end20 cmp.l a0,a3 bne.b l.loop30 l.end20 movem.l (sp)+,d2-d7/a2-a4 rts ;----------------------------------------------------------------------- ; R_DrawMaskedColumn (in r_things.c) by Arto Huusko <arto.huusko@pp.qnet.fi> xref _sprtopscreen ;fixed_t xref _spryscale ;fixed_t xref _mfloorclip ;short* xref _mceilingclip ;short* xref _colfunc ;void R_DrawMaskedColumn (column_t* column) ;column_t=post_t= ; byte topdelta ; byte length cnop 0,4 _R_DrawMaskedColumn: @R_DrawMaskedColumn: cmp.b #$FF,(a0) beq.w .rd_Exit movem.l d2-d6/a2/a3,-(sp) move.l _dc_x(a4),d0 moveq #0,d3 move.l _mfloorclip(a4),a1 move.w (a1,d0.l*2),d3 move.l _mceilingclip(a4),a1 move.w (a1,d0.l*2),d4 ext.l d4 move.l _dc_texturemid(a4),d6 ;basetexturemid move.l _spryscale(a4),d5 move.l _colfunc(a4),a3 move.l a0,a2 ;column talteen .rd_Loop2: moveq #0,d2 move.b (a2),d1 extb.l d1 move.b 1(a2),d2 muls.l d5,d1 mulu.l d5,d2 add.l _sprtopscreen(a4),d1 add.l d1,d2 add.l #FRACUNIT-1,d1 swap d1 ;>>FRACBITS ext.l d1 subq.l #1,d2 swap d2 and.l #$FFFF,d2 cmp.w d3,d2 bmi.b .rd_yhok move.l d3,d2 subq.l #1,d2 .rd_yhok: cmp.l d1,d4 bmi.b .rd_ylok move.l d4,d1 addq.l #1,d1 .rd_ylok: move.l d1,_dc_yl(a4) move.l d2,_dc_yh(a4) cmp.w d1,d2 bmi.b .rd_skip move.l a2,_dc_source(a4) addq.l #3,_dc_source(a4) move.l d6,_dc_texturemid(a4) moveq #0,d0 move.b (a2),d0 swap d0 sub.l d0,_dc_texturemid(a4) jsr (a3) .rd_skip: moveq #4,d0 add.b 1(a2),d0 add.l d0,a2 cmp.b #$FF,(a2) bne.b .rd_Loop2 move.l d6,_dc_texturemid(a4) movem.l (sp)+,d2-d6/a2/a3 .rd_Exit: rts ;// ;// R_DrawMaskedColumn ;// Used for sprites and masked mid textures. ;// Masked means: partly transparent, i.e. stored ;// in posts/runs of opaque pixels. ;// ;short* mfloorclip; ;short* mceilingclip; ; ;fixed_t spryscale; ;fixed_t sprtopscreen; ; ;void R_DrawMaskedColumn (column_t* column) ;{ ; int topscreen; ; int bottomscreen; ; fixed_t basetexturemid; ; ; basetexturemid = dc_texturemid; ; ; for ( ; column->topdelta != 0xff ; ) ; { ; // calculate unclipped screen coordinates ; // for post ; topscreen = sprtopscreen + spryscale*column->topdelta; ; bottomscreen = topscreen + spryscale*column->length; ; ; dc_yl = (topscreen+FRACUNIT-1)>>FRACBITS; ; dc_yh = (bottomscreen-1)>>FRACBITS; ; ; if (dc_yh >= mfloorclip[dc_x]) ; dc_yh = mfloorclip[dc_x]-1; ; if (dc_yl <= mceilingclip[dc_x]) ; dc_yl = mceilingclip[dc_x]+1; ; ; if (dc_yl <= dc_yh) ; { ; dc_source = (byte *)column + 3; ; dc_texturemid = basetexturemid - (column->topdelta<<FRACBITS); ; // dc_source = (byte *)column + 3 - column->topdelta; ; ; // Drawn by either R_DrawColumn ; // or (SHADOW) R_DrawFuzzColumn. ; colfunc (); ; } ; column = (column_t *)( (byte *)column + column->length + 4); ; } ; ; dc_texturemid = basetexturemid; ;} ;----------------------------------------------------------------------- ; V_DrawPatch (in v_video.c) by Arto Huusko <arto.huusko@pp.qnet.fi> xref _screens ;byte* screens[5] xref @I_MarkRect STRUCTURE patch,0 WORD width WORD height WORD leftoffset WORD topoffset STRUCT columnofs,9*4 ;nine ints LABEL patch_size ;width equ 0 ;height equ 2 ;leftoffset equ 4 ;topoffset equ 6 ;columnofs equ 8 ;patch_size equ 8+(9*4) column_t STRUCTURE column,0 BYTE topdelta BYTE length LABEL column_size ;topdelta equ 0 ;length equ 1 ;column_size equ 2 cnop 0,4 _V_DrawPatch: @V_DrawPatch: _V_DrawPatchDirect: @V_DrawPatchDirect: movem.l d3-d6/a2/a3/a5,-(sp) move.l d0,d3 ;x move.l d1,d4 ;y.. scrn in (sp), patch in a0 move.l a0,a2 ;Store patch moveq #0,d0 move.w topoffset(a2),d0 rol.w #8,d0 ;SWAPSHORT ext.l d0 sub.l d0,d4 moveq #0,d0 move.w leftoffset(a2),d0 rol.w #8,d0 ;SWAPSHORT ext.l d0 sub.l d0,d3 move.l 32(sp),d6 bne.b .vd_ScrnOK move.l d3,d0 move.l d4,d1 moveq #0,d5 move.w height(a2),d5 rol.w #8,d5 move.l d5,-(sp) move.w width(a2),d5 rol.w #8,d5 move.l d5,-(sp) jsr (@I_MarkRect) addq.l #8,sp .vd_ScrnOK: lea _screens(a4),a0 move.l (a0,d6.l*4),d5 ;Peter... change here (quite obvious) muls.l _SCREENWIDTH(a4),d4 ;y not needed further add.l d3,d5 ;+x add.l d4,d5 ;+y*SCREENWIDTH ;D3=x, D5=desttop, moveq #0,d6 move.w width(a2),d6 rol.w #8,d6 ;SWAPSHORT ;D6=w subq.l #1,d6 ;for ; col<w lea columnofs(a2),a3 ;prepare for columnofs[col] .vd_Loop: move.l (a3)+,d0 rol.w #8,d0 swap d0 rol.w #8,d0 ;three instructions for SWAPLONG move.l a2,a5 ;column=patch+ add.l d0,a5 ;... SWAPLONG(patch->columnofs[col]) cmp.b #$FF,(a5) beq.b .vdl_Next ;last column .vdl_Loop: move.l d5,a1 ;dest=desttop + ;... here are the other references to SCREENWIDTH ; lsl.l #8,x + lsl.l #6,x is equal to 256x+64x=320x moveq #0,d0 move.b (a5),d0 ;column->topdelta* ;;; move.l d0,d1 ;! ;;; lsl.l #8,d0 ;! ;;; lsl.l #6,d1 ;! ;;; add.l d0,a1 ;! ;;; add.l d1,a1 ;! muls.l _SCREENWIDTH(a4),d0 add.l d0,a1 move.b 1(a5),d0 addq.l #3,a5 ;source ;Would it be possible to use the code from DrawColumn functions by Aki ;here, too?? .vdl_DrawLoop: move.b (a5)+,(a1) add.l _SCREENWIDTH(a4),a1 subq.b #1,d0 bne.b .vdl_DrawLoop addq.l #1,a5 ;bump to next column.. ;bumped already by three and length, so one more. (column +=column->length+4) cmp.b #$FF,(a5) bne.b .vdl_Loop .vdl_Next: addq.l #1,d5 dbf d6,.vd_Loop .vd_exit: movem.l (sp)+,d3-d6/a2/a3/a5 rts ;void ;V_DrawPatch ;( int x, ; int y, ; int scrn, ; patch_t* patch ) ;{ ; ; int count; ; int col; ; column_t* column; ; byte* desttop; ; byte* dest; ; byte* source; ; int w; ; ; y -= SWAPSHORT(patch->topoffset); ; x -= SWAPSHORT(patch->leftoffset); ; ; col = 0; ; desttop = screens[scrn]+y*SCREENWIDTH+x; ; ; w = SWAPSHORT(patch->width); ; ; for ( ; col<w ; x++, col++, desttop++) ; { ; column = (column_t *)((byte *)patch + SWAPLONG(patch->columnofs[col])); ; ; // step through the posts in a column ; while (column->topdelta != 0xff ) ; { ; source = (byte *)column + 3; ; dest = desttop + column->topdelta*SCREENWIDTH; ; count = column->length; ; ; while (count--) ; { ; *dest = *source++; ; dest += SCREENWIDTH; ; } ; column = (column_t *)( (byte *)column + column->length ; + 4 ); ; } ; } ; if (!scrn) ; I_MarkRect (x, y, SWAPSHORT(patch->width), SWAPSHORT(patch->height)); ; ;} ;*********************************************************************** end